#!/usr/bin/env bash

onmt_preprocess \
    --data_type text \
    --train_src ../data/title_remark/remark_en_train.csv \
    --train_tgt ../data/title_remark/remark_zh_train.csv \
    --valid_src ../data/title_remark/remark_en_val.csv \
    --valid_tgt ../data/title_remark/remark_zh_val.csv \
    --save_data ../data/title_remark/remark_output/remark \
    --shard_size 2000000 \
    --num_threads 8 \
    --src_vocab_size 50000 \
    --tgt_vocab_size 50000 \
    --src_seq_length 50 \
    --tgt_seq_length 50 \
    --report_every 100000 \
    --log_file_level 0 # WARNING, NOTSET, ERROR, CRITICAL, INFO, DEBUG, 30, 0, 40, 50, 20, 10